[21]:
# Load all the libraries required for the display of the plots
import sys
import ast
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import numpy as np
import pandas as pd
import networkx as nx
from math import log, ceil
sys.path.insert(0, '../') # required to import modules from the parent directory
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
colors = plotly.colors.DEFAULT_PLOTLY_COLORS.copy()
Plot Cluster Size of a Single Cluster¶
Here, we will focus on displaying how the cluster size changes for the cluster that contains a single gene.
[22]:
# We need both the gene and cluster information for a series of readouts
readouts = {
'Unweighted': '_unw_', # unweighted
'Aβ40': '_wgt_ab40_',
'Aβ42': '_wgt_ab42_',
'sAPPα': '_wgt_sappa_',
'sAPPβ': '_wgt_sappb_',
'Viability': '_wgt_via_',
}
[23]:
# We will keep the information in two different DataFrames, one for Cluster Data and one for Gene Data
geneData = pd.DataFrame(columns=['GENEID'])
clusterData = pd.DataFrame(columns=['clu_id'])
for readout in readouts.keys():
# merge information that says to which cluster each gene belongs
geneData = geneData.merge(pd.read_csv('../results/chuang'+readouts[readout]+'gene.csv', usecols=['GENEID', 'clu_size', 'clu_id']).rename(columns={'clu_id':readout, 'clu_size': readout+'Size'}), how='outer', on='GENEID')
# merge information that says what are the genes that belong to each cluster
clusterData = clusterData.merge(pd.read_csv('../results/chuang'+readouts[readout]+'cluster.csv').rename(columns={'clu_size':readout+'Size', 'clu_genes':readout+'Genes'}), how='outer', on='clu_id')
geneData.set_index(keys='GENEID', inplace=True)
clusterData.set_index(keys='clu_id', inplace=True)
[24]:
# Select the target Gene
gene = int(input())
# These are the cluster IDs of the clusters containing the target gene in each of the
# different results
cluster_ids = geneData.loc[:,~geneData.columns.str.contains('.*Size', case=False)].loc[gene]
cluster_ids
[24]:
Unweighted 332
Aβ40 33
Aβ42 74
sAPPα 74
sAPPβ 34
Viability 0
Name: 351, dtype: int64
[25]:
# We add a column to the dataframe to identify the genes that are part of the hsa05010 pathway (Alzheimer's Disease)
df = pd.read_table('../data/KEGG_hsa05010.tsv', header=None)
df.columns=['GENEID']
geneData['inPathway'] = colors[0]
geneData.loc[geneData.index.isin(df.GENEID.values),'inPathway'] = colors[1]
[26]:
# Do a scatter plot of the cluster sizes
# Define the layout of the graph
fig = go.Figure(
layout = dict(
title = 'Size of cluster containing gene '+str(gene)+' per readout',
yaxis_title = 'Cluster Size',
xaxis_title = 'siRNA Readout',
showlegend = True,
plot_bgcolor = 'white',
xaxis_gridcolor = 'lightGray',
yaxis_linecolor = 'black',
yaxis_gridcolor = 'lightGray',
yaxis_zerolinecolor = 'lightGray',
)
)
# Get a list of only the weighted readouts
wgt = (list(readouts.keys()))
wgt.remove('Unweighted')
# Add a horizontal line to trace the size of the unweighted cluster
fig.add_trace(go.Scatter(
x = wgt,
y = [geneData.at[gene, 'UnweightedSize']]*len(wgt),
mode = 'lines+markers',
line_color = colors[1],
line_dash = 'dash',
name = 'Unweighted Cluster Size',
))
# And a different trace trace for the size of the weighted clusters
fig.add_trace(go.Scatter(
x = wgt,
y = geneData.filter(regex='(?=.*Size$)').drop(columns=['UnweightedSize']).loc[gene].values,
mode = 'lines+markers',
line_color = colors[0],
name = 'Weighted Cluster Size',
))
fig
[27]:
# Save the corresponding Image
fig.write_image(file='../images/chuang singleClusterSize APP.png', format='png')
Plot Clusters with Pathway highlight¶
Layout the clusters independently and use color to highlight those genes that are also part of the target pathway
[28]:
# Load all the edges of the network
edgeData = pd.read_table('../data/network1_chuang.txt', header=None)
edgeData.columns = ['x','y','xsymbol','ysymbol']
edgeData
[28]:
| x | y | xsymbol | ysymbol | |
|---|---|---|---|---|
| 0 | 6927 | 55281 | HNF1A | TMEM140 |
| 1 | 164 | 8906 | AP1G1 | AP1G2 |
| 2 | 3689 | 11082 | ITGB2 | ESM1 |
| 3 | 57326 | 151871 | PBXIP1 | DPPA2 |
| 4 | 5336 | 23236 | PLCG2 | PLCB1 |
| ... | ... | ... | ... | ... |
| 56794 | 3172 | 55798 | HNF4A | METTL2B |
| 56795 | 5595 | 5706 | MAPK3 | PSMC6 |
| 56796 | 6233 | 9491 | RPS27A | PSMF1 |
| 56797 | 6868 | 10459 | ADAM17 | MAD2L2 |
| 56798 | 2064 | 2885 | ERBB2 | GRB2 |
56799 rows × 4 columns
[29]:
# Add symbols to the DataFrame containing GeneData
df = pd.DataFrame()
df['geneid'] = pd.concat([edgeData['x'],edgeData['y']])
df['symbol'] = pd.concat([edgeData['xsymbol'],edgeData['ysymbol']])
df = df.drop_duplicates()
geneData['symbol'] = df.set_index('geneid')['symbol']
[30]:
# Select the first readout to draw and generate the position of its nodes
read1 = 'Unweighted'
nodes1 = ast.literal_eval(clusterData.at[cluster_ids[read1], read1+'Genes'])
edges1 = edgeData.loc[edgeData['x'].isin(nodes1)].loc[edgeData['y'].isin(nodes1)]
# Create a Networkx Graph with the edges of the network
e = []
edges1.apply(lambda x: e.append((x.x, x.y)), axis=1)
G = nx.Graph()
G.add_nodes_from(nodes1)
G.add_edges_from(e)
# Layout the nodes of the graph, using networkx layout algorithms or layout options
# available through the graphviz library ( prog is an enum from ['dot', 'neato', 'fdp', 'sfdp', 'twopi', 'circo'])
# CAUTION the positioning of the nodes can take several minutes depending on the algorithm used to define the layout]
# Layout using graphviz library
layout = nx.nx_agraph.graphviz_layout(G, prog='sfdp', args='-Gratio=auto -Goverlap=scale')
positions1 = pd.DataFrame.from_dict(layout, orient='index', columns = ['px','py'])
edges1 = edges1.merge(positions1, left_on='x', right_index=True, how='left')
edges1 = edges1.merge(positions1, left_on='y', right_index=True, how='left')
edges1['none'] = 'none'
/home/topeins/anaconda3/envs/mcl/lib/python3.6/site-packages/pygraphviz/agraph.py:1341: RuntimeWarning:
Error: remove_overlap: Graphviz not built with triangulation library
[31]:
# Trace that draws the edges of the first cluster
fig1 = go.Figure()
fig1.add_trace(go.Scatter(
x = pd.concat([edges1['px_x'],edges1['px_y'],edges1['none']]).sort_index(kind='merge').values.tolist(),
y = pd.concat([edges1['py_x'],edges1['py_y'],edges1['none']]).sort_index(kind='merge').values.tolist(),
mode = 'lines',
line_width = 0.5,
line_color = '#888',
))
# Trace that draws the nodes of the first cluster
fig1.add_trace(go.Scatter(
x = positions1['px'].values.tolist(),
y = positions1['py'].values.tolist(),
mode = 'markers+text',
text = geneData.loc[nodes1,'symbol'].values.tolist(),
textposition = 'bottom center',
marker_size = 10,
marker_color = geneData.loc[nodes1,'inPathway'].values.tolist(),
))
[32]:
# Do the same for a second readout to draw
read2 = 'Aβ42' # 'Aβ40', 'Aβ42', 'sAPPα', 'sAPPβ', 'Viability',
nodes2 = ast.literal_eval(clusterData.at[cluster_ids[read2], read2+'Genes'])
edges2 = edgeData.loc[edgeData['x'].isin(nodes2)].loc[edgeData['y'].isin(nodes2)]
# Create a Networkx Graph with the edges of the network
e = []
edges2.apply(lambda x: e.append((x.x, x.y)), axis=1)
G = nx.Graph()
G.add_nodes_from(nodes2)
G.add_edges_from(e)
# Layout the nodes of the graph, using networkx layout algorithms or layout options
# available through the graphviz library ( prog is an enum from ['dot', 'neato', 'fdp', 'sfdp', 'twopi', 'circo'])
# CAUTION the positioning of the nodes can take several minutes depending on the algorithm used to define the layout]
# Layout using graphviz library
layout = nx.nx_agraph.graphviz_layout(G, prog='sfdp', args='-Gratio=auto -Goverlap=scale')
positions2 = pd.DataFrame.from_dict(layout, orient='index', columns = ['px','py'])
edges2 = edges2.merge(positions2, left_on='x', right_index=True, how='left')
edges2 = edges2.merge(positions2, left_on='y', right_index=True, how='left')
edges2['none'] = 'none'
[33]:
# Trace that draws the edges of the first cluster
fig2 = go.Figure()
fig2.add_trace(go.Scatter(
x = pd.concat([edges2['px_x'],edges2['px_y'],edges2['none']]).sort_index(kind='merge').values.tolist(),
y = pd.concat([edges2['py_x'],edges2['py_y'],edges2['none']]).sort_index(kind='merge').values.tolist(),
mode = 'lines',
line_width = 0.5,
line_color = '#888',
))
# Trace that draws the nodes of the first cluster
fig2.add_trace(go.Scatter(
x = positions2['px'].values.tolist(),
y = positions2['py'].values.tolist(),
mode = 'markers+text',
text = geneData.loc[nodes2,'symbol'].values.tolist(),
textposition = 'bottom center',
marker_size = 10,
marker_color = geneData.loc[nodes2,'inPathway'].values.tolist(),
))
[34]:
# We plot the different clusters (one per readout) as a Figure with several sub-plots
fig = plotly.subplots.make_subplots(
rows = 1,
cols = 2,
subplot_titles = ([read1, read2]),
vertical_spacing = 0.05,
horizontal_spacing = 0.05
)
fig.update_layout(
title = 'Clusters containing Gene '+geneData.loc[gene,'symbol'],
width = 1600,
height = 800,
plot_bgcolor = 'white',
showlegend = False,
margin = dict( t = 70, b = 50, r = 50, l = 50),
xaxis_showticklabels = False,
yaxis_showticklabels = False,
xaxis2_showticklabels = False,
yaxis2_showticklabels = False,
)
fig.add_trace(fig1.data[0],1,1)
fig.add_trace(fig1.data[1],1,1)
fig.add_trace(fig2.data[0],1,2)
fig.add_trace(fig2.data[1],1,2)
fig
Plot the Cluster’s change in Composition (as network)¶
Here we intend to show how the genes that make up the cluster containing a target gene change.
[36]:
# Add a new column to the genes DataFrame, where we will store the number of times
# any given gene is part of the cluster containing the target Gene (APP)
geneData['count'] = 0
# iterate through the clusters and modify the count value of the member genes
for i, cid in cluster_ids.items():
c = ast.literal_eval(clusterData.at[cid, i+'Genes'])
geneData.loc[c, 'count'] += 1
# Only the genes that are part of the cluster containing the target gene at least once
# are the ones with a count value >= than 1
geneData[geneData['count']>=1]
# Add a column to define the opacity used to draw any particular gene, function of its count number
geneData['opacity'] = geneData['count']/len(cluster_ids)
geneData
[36]:
| UnweightedSize | Unweighted | Aβ40Size | Aβ40 | Aβ42Size | Aβ42 | sAPPαSize | sAPPα | sAPPβSize | sAPPβ | ViabilitySize | Viability | inPathway | symbol | count | opacity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GENEID | ||||||||||||||||
| 6927 | 1876 | 0 | 175 | 0 | 215 | 0 | 1626 | 0 | 266 | 0 | 2689 | 0 | rgb(31, 119, 180) | HNF1A | 1 | 0.166667 |
| 164 | 13 | 1 | 6 | 1 | 5 | 1 | 12 | 1 | 11 | 1 | 5 | 1 | rgb(31, 119, 180) | AP1G1 | 0 | 0.000000 |
| 3689 | 10 | 2 | 96 | 2 | 81 | 2 | 42 | 2 | 9 | 2 | 191 | 2 | rgb(31, 119, 180) | ITGB2 | 0 | 0.000000 |
| 57326 | 1876 | 0 | 1173 | 3 | 1444 | 3 | 1626 | 0 | 1488 | 3 | 2689 | 0 | rgb(31, 119, 180) | PBXIP1 | 1 | 0.166667 |
| 5336 | 88 | 3 | 46 | 4 | 41 | 4 | 9 | 3 | 27 | 4 | 41 | 3 | rgb(31, 119, 180) | PLCG2 | 0 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4808 | 2 | 1611 | 11 | 511 | 13 | 507 | 14 | 503 | 23 | 494 | 11 | 421 | rgb(31, 119, 180) | NHLH2 | 0 | 0.000000 |
| 6474 | 2 | 1658 | 2 | 1528 | 2 | 1472 | 2 | 1511 | 2 | 1454 | 2 | 1296 | rgb(31, 119, 180) | SHOX2 | 0 | 0.000000 |
| 10957 | 15 | 342 | 16 | 177 | 12 | 314 | 13 | 318 | 20 | 145 | 2689 | 0 | rgb(31, 119, 180) | PNRC1 | 1 | 0.166667 |
| 8780 | 1876 | 0 | 1173 | 3 | 1444 | 3 | 1626 | 0 | 1488 | 3 | 2689 | 0 | rgb(31, 119, 180) | RIOK3 | 1 | 0.166667 |
| 2193 | 1876 | 0 | 1 | 1599 | 1 | 1545 | 268 | 9 | 1 | 1528 | 211 | 8 | rgb(31, 119, 180) | FARSA | 0 | 0.000000 |
10906 rows × 16 columns
[37]:
# Together with the individual genes, in order to display the clusters as a graph, we need to know the
# edges between the different nodes
edgeData = pd.read_table('../data/network1_chuang.txt', header=None)
edgeData.columns = ['x','y','xsymbol','ysymbol']
# filter out the edges where both end-points are not part of any of the clusters containing the target gene
genes = geneData[geneData['count']>=1].index.tolist()
edgeData = edgeData[edgeData.apply(lambda x: (x.x in genes) and (x.y in genes), axis=1)]
edgeData
[37]:
| x | y | xsymbol | ysymbol | |
|---|---|---|---|---|
| 0 | 6927 | 55281 | HNF1A | TMEM140 |
| 9 | 5985 | 5981 | RFC5 | RFC1 |
| 12 | 3175 | 80185 | ONECUT1 | TTI2 |
| 33 | 3172 | 50 | HNF4A | ACO2 |
| 49 | 164656 | 2335 | TMPRSS6 | FN1 |
| ... | ... | ... | ... | ... |
| 56763 | 5721 | 7316 | PSME2 | UBC |
| 56766 | 3172 | 2252 | HNF4A | FGF7 |
| 56776 | 5694 | 5695 | PSMB6 | PSMB7 |
| 56777 | 4191 | 5315 | MDH2 | PKM |
| 56794 | 3172 | 55798 | HNF4A | METTL2B |
6474 rows × 4 columns
[38]:
# Create a Networkx Graph with the edges of the network
edges = []
edgeData.apply(lambda x: edges.append((x.x, x.y)), axis=1)
G = nx.Graph()
G.add_edges_from(edges)
# Layout the nodes of the graph, using networkx layout algorithms or layout options
# available through the graphviz library ( prog is an enum from ['dot', 'neato', 'fdp', 'sfdp', 'twopi', 'circo'])
# CAUTION the positioning of the nodes can take several minutes depending on the algorithm used to define the layout
# Layout using graphviz library
# layout = nx.nx_agraph.graphviz_layout(G, prog='dot', args='-Gsize="10,10" -Gratio=fill -Gnodesep=0.1 -Granksep=0.1 -Grankdir=BT')
# Layout using force-directed algorithm implemented in networkx
# layout = nx.kamada_kawai_layout(G)#,scale=2)
# Layout using the shell algorithm implemented in networkx
shells = []
# max number of clusters to which a gene can belong
m = geneData[['count']].max()[0]
for i in range(m,0,-1):
# if the amount of genes in any given shell is too large, split it into several shells
s = geneData[geneData['count']== i].index.tolist()
# max number of genes in the current shell
while len(s) > 0:
n = 50*len(shells)
if len(s) > n:
shells.append(s[:n])
del s[:n]
else:
shells.append(s)
s = []
layout = nx.shell_layout(G, shells, scale=len(shells))
print(len(shells))
# layout
13
[39]:
# Define a DataFrame for the list of node positions, and merge this data into the
# geneData DataFrame
layoutData = pd.DataFrame.from_dict(layout, orient='index', columns = ['px','py'])
geneData = geneData.merge(layoutData, how = 'left', right_index = True, left_index = True)
geneData
layoutData.max()
[39]:
px 13.000000
py 12.995393
dtype: float64
[40]:
# For convenience, also merge the positional information of the nodes to the edgeData DataFrame
edgeData = edgeData.merge(layoutData[['px','py']], how = 'left', right_index=True, left_on='x')
edgeData = edgeData.merge(layoutData[['px','py']], how = 'left', right_index=True, left_on='y')
edgeData['none'] = None
edgeData
[40]:
| x | y | xsymbol | ysymbol | px_x | py_x | px_y | py_y | none | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 6927 | 55281 | HNF1A | TMEM140 | -0.843757 | 6.948962 | -10.303911 | 3.850899 | None |
| 9 | 5985 | 5981 | RFC5 | RFC1 | -1.134009 | 6.907534 | -3.401885 | -6.117775 | None |
| 12 | 3175 | 80185 | ONECUT1 | TTI2 | -1.278421 | 6.882270 | -10.397429 | 3.590745 | None |
| 33 | 3172 | 50 | HNF4A | ACO2 | -1.849830 | 6.751158 | -10.441730 | 3.459807 | None |
| 49 | 164656 | 2335 | TMPRSS6 | FN1 | 0.723220 | 5.956253 | -1.647186 | 5.769469 | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 56763 | 5721 | 7316 | PSME2 | UBC | -1.990811 | 6.710937 | 6.187679 | -3.273016 | None |
| 56766 | 3172 | 2252 | HNF4A | FGF7 | -1.849830 | 6.751158 | -8.489806 | 2.987171 | None |
| 56776 | 5694 | 5695 | PSMB6 | PSMB7 | 6.853987 | 1.422275 | -2.101660 | -7.719004 | None |
| 56777 | 4191 | 5315 | MDH2 | PKM | 3.597208 | -6.005006 | 2.970659 | -7.428000 | None |
| 56794 | 3172 | 55798 | HNF4A | METTL2B | -1.849830 | 6.751158 | 4.935251 | 0.802056 | None |
6474 rows × 9 columns
[41]:
# We plot the different clusters (one per readout) as a Figure with several sub-plots
fig = plotly.subplots.make_subplots(
rows = 3, # there are 5 subplots, so we define a grid of 3x2
cols = 2,
shared_xaxes = True,
shared_yaxes = True,
subplot_titles = (['subtitle']*len(cluster_ids)),
vertical_spacing = 0.05,
horizontal_spacing = 0.05,
)
# initial sub-plot data
i = 1
j = 1
a = 'a'
# We add two traces for each cluster, the first one draws only the edges of the cluster
# whilst the second draws the nodes
for readout, id in cluster_ids.items():
# Generate the data required for the traces
# Get the list of genes in the current cluster
cluster = ast.literal_eval(clusterData.at[id, readout+'Genes'])
# Define a DataFrame that contains only the interactions that are within the cluster
df = edgeData[edgeData.apply(lambda x: (x.x in cluster) and (x.y in cluster), axis=1)]
# Trace that draws the corresponding edges
fig.append_trace(
go.Scatter(
x = pd.concat([df['px_x'],df['px_y'],df['none']]).sort_index(kind='merge').values.tolist(),
y = pd.concat([df['py_x'],df['py_y'],df['none']]).sort_index(kind='merge').values.tolist(),
mode = 'lines',
line_width = 0.5,
line_color = '#888',
showlegend = False,
),i,j,
)
# Define a DataFrame that contains only the nodes that are within the cluster
df = geneData.loc[cluster]
# Trace that draws the nodes
fig.append_trace(
go.Scatter(
x = df['px'].values.tolist(),
y = df['py'].values.tolist(),
mode = 'markers',
textposition = 'bottom center',
marker = dict(
color = colors[0],
opacity = df['opacity'].tolist(),
colorscale = plotly.colors.sequential.YlOrRd,
),
showlegend = False,
), i,j,
)
# Add the subtitle for the current sub-plot
fig['layout']['annotations'][(2*i+j)-3]['text'] = '('+a+') '+readout
# update the positioning of the next plot
j += 1
a = chr(ord(a)+1)
if j > 2:
i += 1
j = 1
fig.update_layout(
title = 'Clusters containing Gene '+str(gene)+' per weighting scheme',
showlegend = False,
width = 1000,
height = 1000,
margin = dict( t = 70, b = 50, r = 50, l = 50),
plot_bgcolor = 'white',
)
fig.update_xaxes(
showticklabels = False,
)
fig.update_yaxes(
showticklabels = False,
range = [-15,15],
)
# Add a color scale to the figure
n = geneData['count'].max()
scale = [[0, 'rgba(31, 119, 180, '+str(round(1/n,2))+')']]
for i in range(1,n):
scale.append([i/n, 'rgba(31, 119, 180, '+str(round(i/n,2))+')'])
scale.append([(i+1)/n, 'rgba(31, 119, 180, '+str(round((i+1)/n,2))+')'])
# scale
colorbar_trace = go.Scatter(
x=[None],
y=[None],
mode='markers',
hoverinfo='none',
marker=dict(
colorscale = scale,
cmin = 0,
cmax = n,
colorbar = dict(
outlinewidth = 1,
title= 'Intersections',
ticks= 'inside',
tickwidth = 2,
ticklen = 30,
tickmode = 'array',
tickvals = [ i for i in range(n+1)],
)
),
)
fig.add_trace(colorbar_trace)
fig
[42]:
# Save the corresponding Image
fig.write_image(file='../images/chuang clusters containing APP.png', format='png')
Heatmap the Genes in different readouts that belong to a Pathway¶
Here we intend to show how the overlap between the cluster and some pre-defined pathways.
[43]:
pathways = pd.DataFrame()
pathways['hsa05010'] = pd.read_csv('../data/KEGG_hsa05010.tsv', header=None)[0]
pathways.set_index('hsa05010', inplace=True)
pathways = pathways.merge( right=(geneData['Unweighted']==cluster_ids['Unweighted']), how='left', right_index=True, left_index=True)
pathways = pathways.merge( right=(geneData['Aβ40']==cluster_ids['Aβ40']), how='left', right_index=True, left_index=True)
pathways = pathways.merge( right=(geneData['Aβ42']==cluster_ids['Aβ42']), how='left', right_index=True, left_index=True)
pathways = pathways.merge( right=(geneData['sAPPα']==cluster_ids['sAPPα']), how='left', right_index=True, left_index=True)
pathways = pathways.merge( right=(geneData['sAPPβ']==cluster_ids['sAPPβ']), how='left', right_index=True, left_index=True)
symbols = pd.read_table('../data/network1_chuang.txt',header=None)
symbols.columns=['x','y','xsym','ysym']
s1 = pd.Series(data=symbols.xsym.values.tolist(), index=symbols.x.values.tolist())
s2 = pd.Series(data=symbols.ysym.values.tolist(), index=symbols.y.values.tolist())
s = s1.append(s2).drop_duplicates()
pathways = pathways.merge( right=pd.Series(s, name='symbol'), how='left', right_index=True, left_index=True)
pathways.dropna(inplace=True)
pathways.set_index(['symbol'], append=True,inplace=True)
pathways
[43]:
| Unweighted | Aβ40 | Aβ42 | sAPPα | sAPPβ | ||
|---|---|---|---|---|---|---|
| hsa05010 | symbol | |||||
| 102 | ADAM10 | False | False | False | False | False |
| 6868 | ADAM17 | False | False | False | False | False |
| 351 | APP | True | True | True | True | True |
| 8883 | NAE1 | False | True | True | False | False |
| 322 | APBB1 | True | True | True | True | True |
| ... | ... | ... | ... | ... | ... | ... |
| 29982 | NRBF2 | False | False | False | False | False |
| 5289 | PIK3C3 | False | False | False | False | False |
| 55102 | ATG2B | False | False | False | False | False |
| 26100 | WIPI2 | False | False | False | False | False |
| 55062 | WIPI1 | False | False | False | False | False |
310 rows × 5 columns
[44]:
df = pathways.loc[pathways.any(axis=1)]
fig = go.Figure(
layout = dict(
title = 'Genes in APP Cluster that belong to hsa05010',
yaxis_autorange = 'reversed',
width = 600,
)
)
colorscale = [
[0, 'white'],#white
[0.5, 'white'],#white
[0.5, colors[0]],#white
[1, colors[0]]
]
fig.add_trace(
go.Heatmap(
z = df.values.astype('int64').tolist(),
x = df.columns.values.tolist(),
y = df.index.get_level_values(1).values.tolist(),
xgap= 2,
ygap= 2,
colorscale = colorscale,
colorbar = dict(
thickness =25,
tickvals = [0.25, 0.75],
ticktext = ['False', 'True'],
outlinewidth=1,
)
)
)
fig